{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "# 9 Pandas分组与聚合"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  key1   key2     data1     data2\n",
      "0    a    one  1.331587  0.004291\n",
      "1    b    one  0.715279 -0.174600\n",
      "2    a    two -1.545400  0.433026\n",
      "3    b  three -0.008384  1.203037\n",
      "4    a    two  0.621336 -0.965066\n",
      "5    b    two -0.720086  1.028274\n",
      "6    a    one  0.265512  0.228630\n",
      "7    a  three  0.108549  0.445138\n",
      "object\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "np.random.seed(10)\n",
    "dict_obj = {'key1' : ['a', 'b', 'a', 'b',\n",
    "                      'a', 'b', 'a', 'a'],\n",
    "            'key2' : ['one', 'one', 'two', 'three',\n",
    "                      'two', 'two', 'one', 'three'],\n",
    "            'data1': np.random.randn(8),\n",
    "            'data2': np.random.randn(8)}\n",
    "\n",
    "df_obj = pd.DataFrame(dict_obj)\n",
    "\n",
    "print(df_obj)\n",
    "print(df_obj.loc[:,\"key1\"].dtype)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.408242200Z",
     "start_time": "2024-05-04T05:58:16.400199800Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 按照df的列进行分组"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000025ACAA7FC10>\n",
      "<class 'pandas.core.groupby.generic.DataFrameGroupBy'>\n"
     ]
    }
   ],
   "source": [
    "# dataframe根据key1进行分组，\n",
    "# 分组后的对象类型，如果不进行聚合操作拿到的只是groupby的对象\n",
    "print(df_obj.groupby('key1'))\n",
    "print(type(df_obj.groupby('key1')))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T06:04:03.107102200Z",
     "start_time": "2024-05-04T06:04:03.086323600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key1\n",
      "a    5\n",
      "b    3\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "      data1  data2\n",
      "key1              \n",
      "a         5      5\n",
      "b         3      3\n",
      "--------------------------------------------------\n",
      "a\n",
      "  key1  data1  data2\n",
      "0    a      5      7\n",
      "2    a      2      4\n",
      "4    a      2      5\n",
      "6    a      1      9\n",
      "7    a      9      2\n",
      "--------------------------------------------------\n",
      "b\n",
      "  key1  data1  data2\n",
      "1    b      1      5\n",
      "3    b      1      1\n",
      "5    b      9      7\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "group1=df_obj.groupby('key1')   # group1是一个可迭代对象\n",
    "print(group1.size())\n",
    "print(\"-\"*50)\n",
    "\n",
    "print(group1.count())\n",
    "print(\"-\"*50)\n",
    "\n",
    "for name,group in group1: # 遍历分组\n",
    "    print(name)\n",
    "    print(group)\n",
    "    print(\"-\"*50)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T06:04:04.724631400Z",
     "start_time": "2024-05-04T06:04:04.677414700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    5\n",
      "1    1\n",
      "2    2\n",
      "3    1\n",
      "4    2\n",
      "5    9\n",
      "6    1\n",
      "7    9\n",
      "Name: data1, dtype: int32\n",
      "--------------------------------------------------\n",
      "<pandas.core.groupby.generic.SeriesGroupBy object at 0x0000025ACBB2FC40>\n",
      "<class 'pandas.core.groupby.generic.SeriesGroupBy'>\n",
      "--------------------------------------------------\n",
      "a\n",
      "0    5\n",
      "2    2\n",
      "4    2\n",
      "6    1\n",
      "7    9\n",
      "Name: data1, dtype: int32\n",
      "--------------------------------------------------\n",
      "b\n",
      "1    1\n",
      "3    1\n",
      "5    9\n",
      "Name: data1, dtype: int32\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "print(df_obj['data1'])\n",
    "# dataframe的 data1 列根据 key1 进行分组\n",
    "print('-'*50)\n",
    "\n",
    "print(df_obj['data1'].groupby(df_obj['key1']))\n",
    "print(type(df_obj['data1'].groupby(df_obj['key1'])))\n",
    "print('-'*50)\n",
    "\n",
    "for i , group in df_obj['data1'].groupby(df_obj['key1']):\n",
    "    # print(i)\n",
    "    # print(type(i))    # 如果不拆开，则是tuple\n",
    "    print(i)\n",
    "    print(group)\n",
    "    print('-'*50)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T06:05:30.503279700Z",
     "start_time": "2024-05-04T06:05:30.448250400Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 先提取要用的特征再分组，最后聚合运算"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "0    5\n",
      "1    1\n",
      "2    2\n",
      "3    1\n",
      "4    2\n",
      "5    9\n",
      "6    1\n",
      "7    9\n",
      "Name: data1, dtype: int32\n",
      "--------------------------------------------------\n",
      "key1\n",
      "a    3.800000\n",
      "b    3.666667\n",
      "Name: data1, dtype: float64\n",
      "--------------------------------------------------\n",
      "   data1  data2\n",
      "0      5      7\n",
      "1      1      5\n",
      "2      2      4\n",
      "3      1      1\n",
      "4      2      5\n",
      "5      9      7\n",
      "6      1      9\n",
      "7      9      2\n",
      "--------------------------------------------------\n",
      "         data1     data2\n",
      "key1                    \n",
      "a     3.800000  5.400000\n",
      "b     3.666667  4.333333\n"
     ]
    }
   ],
   "source": [
    "grouped1 = df_obj.groupby('key1')\n",
    "#print(grouped1.mean()) # 有不是数值类型的，则不能算均值\n",
    "print('-'*50)\n",
    "\n",
    "# 提取出data1列作为一个Series，对这个Series进行a、b分组，再计算data1在a、b组的均值\n",
    "print(df_obj['data1'])  # Series\n",
    "print('-'*50)\n",
    "grouped2 = df_obj['data1'].groupby(df_obj['key1'])\n",
    "print(grouped2.mean())\n",
    "print('-'*50)\n",
    "\n",
    "# 提取出data1、data2列作为一个DataFrame，对这个DataFrame进行a、b分组，再计算data1、data2在a、b组的均值\n",
    "print(df_obj.loc[:,[\"data1\",\"data2\"]])\n",
    "print('-'*50)\n",
    "grouped3 = df_obj.loc[:,[\"data1\",\"data2\"]].groupby(df_obj[\"key1\"])\n",
    "print(grouped3.mean())\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T06:16:34.557764Z",
     "start_time": "2024-05-04T06:16:34.519631600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 9.1.3 按自定义的key分组（不重要，无需掌握）"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    1\n",
      "1    1\n",
      "2    1\n",
      "3    3\n",
      "5    1\n",
      "7    1\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000025ACAA33FD0>\n",
      "--------------------------------------------------\n",
      "  key1         key2     data1     data2\n",
      "0    a          one  1.331587  0.004291\n",
      "1    b          one  0.715279 -0.174600\n",
      "2    a          two -1.545400  0.433026\n",
      "3  bab  threetwotwo -0.107133  1.266246\n",
      "5    a          one  0.265512  0.228630\n",
      "7    a        three  0.108549  0.445138\n"
     ]
    }
   ],
   "source": [
    "# # 按自定义key分组，列表\n",
    "self_def_key = [0, 1, 2, 3, 3, 3, 5, 7]\n",
    "print(df_obj.groupby(self_def_key).size())  #等于mysql的count\n",
    "print(\"-\"*50)\n",
    "\n",
    "print(df_obj.groupby(self_def_key))\n",
    "print(\"-\"*50)\n",
    "\n",
    "print(df_obj.groupby(self_def_key).sum())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.551305800Z",
     "start_time": "2024-05-04T05:58:16.473526100Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 按自定义key分组，多层列表"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key1  key2 \n",
      "a     one      2\n",
      "      three    1\n",
      "      two      2\n",
      "b     one      1\n",
      "      three    1\n",
      "      two      1\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "key1  key2 \n",
      "a     one      2\n",
      "      three    1\n",
      "      two      2\n",
      "b     one      1\n",
      "      three    1\n",
      "      two      1\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(df_obj.groupby([df_obj['key1'], df_obj['key2']]).size())\n",
    "print('-'*50)\n",
    "\n",
    "# # 按多个列多层分组\n",
    "grouped2 = df_obj.groupby(['key1', 'key2'])\n",
    "print(grouped2.size())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.551305800Z",
     "start_time": "2024-05-04T05:58:16.489484100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key2   key1\n",
      "one    a       2\n",
      "       b       1\n",
      "three  a       1\n",
      "       b       1\n",
      "two    a       2\n",
      "       b       1\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "<class 'pandas.core.series.Series'>\n",
      "               data1     data2\n",
      "key2  key1                    \n",
      "one   a     0.798549  0.116461\n",
      "      b     0.715279 -0.174600\n",
      "three a     0.108549  0.445138\n",
      "      b    -0.008384  1.203037\n",
      "two   a    -0.462032 -0.266020\n",
      "      b    -0.720086  1.028274\n"
     ]
    }
   ],
   "source": [
    "# # 多层分组按key的顺序进行\n",
    "\n",
    "grouped3 = df_obj.groupby(['key2', 'key1'])\n",
    "print(grouped3.size()) #series,类似mysql的count\n",
    "print('-'*50)\n",
    "\n",
    "print(type(grouped3.size()))\n",
    "print(grouped3.mean())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.551305800Z",
     "start_time": "2024-05-04T05:58:16.517239Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MultiIndex([(  'one', 'a'),\n",
      "            (  'one', 'b'),\n",
      "            ('three', 'a'),\n",
      "            ('three', 'b'),\n",
      "            (  'two', 'a'),\n",
      "            (  'two', 'b')],\n",
      "           names=['key2', 'key1'])\n"
     ]
    }
   ],
   "source": [
    "print(grouped3.mean().index)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.552303600Z",
     "start_time": "2024-05-04T05:58:16.522531400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          data1               data2          \n",
      "key1          a         b         a         b\n",
      "key2                                         \n",
      "one    0.798549  0.715279  0.116461 -0.174600\n",
      "three  0.108549 -0.008384  0.445138  1.203037\n",
      "two   -0.462032 -0.720086 -0.266020  1.028274\n",
      "MultiIndex([('data1', 'a'),\n",
      "            ('data1', 'b'),\n",
      "            ('data2', 'a'),\n",
      "            ('data2', 'b')],\n",
      "           names=[None, 'key1'])\n"
     ]
    }
   ],
   "source": [
    "# unstack可以将多层索引的结果转换成单层的dataframe\n",
    "print(grouped3.mean().unstack())  #竖变横,只能变为列索引的最大的索引\n",
    "print(grouped3.mean().unstack().columns)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.690820200Z",
     "start_time": "2024-05-04T05:58:16.548552100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('one', 'a')\n",
      "  key1 key2     data1     data2\n",
      "0    a  one  1.331587  0.004291\n",
      "6    a  one  0.265512  0.228630\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "('one', 'b')\n",
      "  key1 key2     data1   data2\n",
      "1    b  one  0.715279 -0.1746\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "('three', 'a')\n",
      "  key1   key2     data1     data2\n",
      "7    a  three  0.108549  0.445138\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "('three', 'b')\n",
      "  key1   key2     data1     data2\n",
      "3    b  three -0.008384  1.203037\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "('two', 'a')\n",
      "  key1 key2     data1     data2\n",
      "2    a  two -1.545400  0.433026\n",
      "4    a  two  0.621336 -0.965066\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "('two', 'b')\n",
      "  key1 key2     data1     data2\n",
      "5    b  two -0.720086  1.028274\n",
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "source": [
    "# # 多层分组，根据key1 和 key2，做个性化处理\n",
    "for group_name, group_data in grouped3:\n",
    "    print(group_name)\n",
    "    print(group_data)\n",
    "    print(type(group_data))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.697370200Z",
     "start_time": "2024-05-04T05:58:16.553300800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [
    {
     "data": {
      "text/plain": "[(('one', 'a'),\n    key1 key2     data1     data2\n  0    a  one  1.331587  0.004291\n  6    a  one  0.265512  0.228630),\n (('one', 'b'),\n    key1 key2     data1   data2\n  1    b  one  0.715279 -0.1746),\n (('three', 'a'),\n    key1   key2     data1     data2\n  7    a  three  0.108549  0.445138),\n (('three', 'b'),\n    key1   key2     data1     data2\n  3    b  three -0.008384  1.203037),\n (('two', 'a'),\n    key1 key2     data1     data2\n  2    a  two -1.545400  0.433026\n  4    a  two  0.621336 -0.965066),\n (('two', 'b'),\n    key1 key2     data1     data2\n  5    b  two -0.720086  1.028274)]"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(grouped3)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.698336300Z",
     "start_time": "2024-05-04T05:58:16.583220300Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 9.3 按类型分组(不重要，自行查看)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key1      object\n",
      "key2      object\n",
      "data1    float64\n",
      "data2    float64\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "print(df_obj.dtypes)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.698336300Z",
     "start_time": "2024-05-04T05:58:16.602263Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  key1   key2     data1     data2     data3\n",
      "0    a    one  1.331587  0.004291  1.004291\n",
      "1    b    one  0.715279 -0.174600  0.825400\n",
      "2    a    two -1.545400  0.433026  1.433026\n",
      "3    b  three -0.008384  1.203037  2.203037\n",
      "4    a    two  0.621336 -0.965066  0.034934\n",
      "5    b    two -0.720086  1.028274  2.028274\n",
      "6    a    one  0.265512  0.228630  1.228630\n",
      "7    a  three  0.108549  0.445138  1.445138\n",
      "float64    3\n",
      "object     2\n",
      "dtype: int64\n",
      "    float64  object\n",
      "0  2.340169    aone\n",
      "1  1.366079    bone\n",
      "2  0.320652    atwo\n",
      "3  3.397691  bthree\n",
      "4 -0.308795    atwo\n",
      "5  2.336463    btwo\n",
      "6  1.722772    aone\n",
      "7  1.998824  athree\n"
     ]
    }
   ],
   "source": [
    "#一列数据类型必须一致，因为现实生活中一个特征类型是一致的\n",
    "df_obj['data3']=df_obj['data2']+1\n",
    "print(df_obj)\n",
    "print(df_obj.groupby(df_obj.dtypes, axis=1).size())#按轴0没有意义\n",
    "print(df_obj.groupby(df_obj.dtypes, axis=1).sum())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.698336300Z",
     "start_time": "2024-05-04T05:58:16.624205400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   1      2         3         4\n",
      "0  a    one  1.335878  1.004291\n",
      "1  b    one  0.540679  0.825400\n",
      "2  a    two -1.112374  1.433026\n",
      "3  b  three  1.194654  2.203037\n",
      "4  a    two -0.343730  0.034934\n",
      "5  b    two  0.308189  2.028274\n",
      "6  a    one  0.494142  1.228630\n",
      "7  a  three  0.553686  1.445138\n"
     ]
    }
   ],
   "source": [
    "#这里是自定义的列索引，用的不错\n",
    "self_column_key=[1,2,3,3,4]\n",
    "print(df_obj.groupby(self_column_key, axis=1).sum())   #通过自定义索引去让不同列相加"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.699361100Z",
     "start_time": "2024-05-04T05:58:16.637170500Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 2. 通过字典分组(不重要，自行查看)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   a    b    c    d  e\n",
      "A  4  7.0  2.0  5.0  3\n",
      "B  7  NaN  NaN  NaN  3\n",
      "C  1  7.0  8.0  9.0  2\n",
      "D  8  2.0  5.0  1.0  9\n",
      "E  6  5.0  8.0  9.0  9\n",
      "C         1\n",
      "Java      2\n",
      "Python    2\n",
      "dtype: int64\n",
      "   C  Java  Python\n",
      "A  1     2       2\n",
      "B  0     1       1\n",
      "C  1     2       2\n",
      "D  1     2       2\n",
      "E  1     2       2\n",
      "     C  Java  Python\n",
      "A  5.0   5.0    11.0\n",
      "B  0.0   3.0     7.0\n",
      "C  9.0  10.0     8.0\n",
      "D  1.0  14.0    10.0\n",
      "E  9.0  17.0    11.0\n"
     ]
    }
   ],
   "source": [
    "df_obj2 = pd.DataFrame(np.random.randint(1, 10, (5,5)),\n",
    "                       columns=['a', 'b', 'c', 'd', 'e'],\n",
    "                       index=['A', 'B', 'C', 'D', 'E'])\n",
    "# 给指定某个部分的数据重新赋值为 np.NaN\n",
    "df_obj2.loc['B','b':'d']=np.NAN\n",
    "\n",
    "print(df_obj2)\n",
    "# 通过字典分组\n",
    "mapping_dict = {'a':'Python', 'b':'Python', 'c':'Java', 'd':'C', 'e':'Java'}\n",
    "print(df_obj2.groupby(mapping_dict, axis=1).size())\n",
    "print(df_obj2.groupby(mapping_dict, axis=1).count()) # df内部值非NaN的个数\n",
    "print(df_obj2.groupby(mapping_dict, axis=1).sum())  #求和不记录nan,nan当为0\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.699361100Z",
     "start_time": "2024-05-04T05:58:16.663100100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "outputs": [],
   "source": [
    "str1='熊大'"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.699361100Z",
     "start_time": "2024-05-04T05:58:16.681756500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "data": {
      "text/plain": "'熊'"
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "str1[0]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.832897100Z",
     "start_time": "2024-05-04T05:58:16.695344600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      a  b  c  d  ee\n",
      "AA    3  7  3  9   9\n",
      "BBBB  7  7  6  7   1\n",
      "CC    1  7  2  9   2\n",
      "D     3  9  6  1   3\n",
      "EE    8  4  1  5   3\n",
      "1    1\n",
      "2    3\n",
      "4    1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": "1    1\n2    3\n4    1\ndtype: int64"
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 通过函数分组\n",
    "df_obj3 = pd.DataFrame(np.random.randint(1, 10, (5,5)),\n",
    "                       columns=['a', 'b', 'c', 'd', 'ee'],\n",
    "                       index=['AA', 'BBBB', 'CC', 'D', 'EE'])\n",
    "#df_obj3\n",
    "\n",
    "def group_key(idx):\n",
    "    \"\"\"\n",
    "        idx 为列索引或行索引，默认为行索引 axis=0,传入行索引，轴为1，传入列索引\n",
    "    \"\"\"\n",
    "    #return idx\n",
    "    return len(idx)\n",
    "print(df_obj3)\n",
    "print(df_obj3.groupby(group_key,axis=0).size())\n",
    "\n",
    "# 以上自定义函数等价于\n",
    "df_obj3.groupby(len).size()\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.856638Z",
     "start_time": "2024-05-04T05:58:16.719281Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "language Python Java Python Java Python\n",
      "index1        A    A      B    C      B\n",
      "0             1    4      4    2      3\n",
      "1             6    1      2    1      2\n",
      "2             1    3      2    2      1\n",
      "3             1    6      1    5      7\n",
      "4             7    1      3    4      4\n",
      "language  Java  Python\n",
      "0            6       8\n",
      "1            2      10\n",
      "2            5       4\n",
      "3           11       9\n",
      "4            5      14\n",
      "index1  A  B  C\n",
      "0       5  7  2\n",
      "1       7  4  1\n",
      "2       4  3  2\n",
      "3       7  8  5\n",
      "4       8  7  4\n"
     ]
    }
   ],
   "source": [
    "# 通过索引级别分组，可以指定某个级别进行分组\n",
    "columns = pd.MultiIndex.from_arrays([['Python', 'Java', 'Python', 'Java', 'Python'],\n",
    "                                     ['A', 'A', 'B', 'C', 'B']],\n",
    "                                    names=['language', 'index1'])\n",
    "df_obj4 = pd.DataFrame(np.random.randint(1, 10, (5, 5)), columns=columns)\n",
    "print(df_obj4)\n",
    "\n",
    "# 根据language进行分组\n",
    "print(df_obj4.groupby(level='language', axis=1).sum())\n",
    "# 根据index进行分组\n",
    "print(df_obj4.groupby(level='index1', axis=1).sum())\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.857614600Z",
     "start_time": "2024-05-04T05:58:16.738438500Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 9.4 聚合\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  key1  data1  data2\n",
      "0    a      5      7\n",
      "1    b      1      5\n",
      "2    a      2      4\n",
      "3    b      1      1\n",
      "4    a      2      5\n",
      "5    b      9      7\n",
      "6    a      1      9\n",
      "7    a      9      2\n",
      "--------------------------------------------------\n",
      "a\n",
      "  key1  data1  data2\n",
      "0    a      5      7\n",
      "2    a      2      4\n",
      "4    a      2      5\n",
      "6    a      1      9\n",
      "7    a      9      2\n",
      "--------------------------------------------------\n",
      "b\n",
      "  key1  data1  data2\n",
      "1    b      1      5\n",
      "3    b      1      1\n",
      "5    b      9      7\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "np.random.seed(10)\n",
    "dict_obj = {'key1' : ['a', 'b', 'a', 'b',\n",
    "                      'a', 'b', 'a', 'a'],\n",
    "            # 'key2' : ['one', 'one', 'two', 'three',\n",
    "            #           'two', 'two', 'one', 'three'],\n",
    "            'data1': np.random.randint(1,10, 8),\n",
    "            'data2': np.random.randint(1,10, 8)}\n",
    "df_obj=pd.DataFrame(dict_obj)\n",
    "print(df_obj)\n",
    "print('-'*50)\n",
    "\n",
    "for i in df_obj.groupby('key1'):\n",
    "    print(i[0])\n",
    "    print(i[1])\n",
    "    print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.857614600Z",
     "start_time": "2024-05-04T05:58:16.760173200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      data1  data2\n",
      "key1              \n",
      "a        19     27\n",
      "b        11     13\n",
      "--------------------------------------------------\n",
      "      data1  data2\n",
      "key1              \n",
      "a         9      9\n",
      "b         9      7\n"
     ]
    }
   ],
   "source": [
    "print(df_obj.loc[:,['key1','data1','data2']].groupby(\"key1\").sum())\n",
    "#print(df_obj.groupby(\"key1\").sum())    # 最好不这么写，因为可能有的列的数据不能计算，把要计算得咧提取出来\n",
    "print('-'*50)\n",
    "\n",
    "print(df_obj.loc[:,['key1','data1','data2']].groupby(\"key1\").max())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.858603500Z",
     "start_time": "2024-05-04T05:58:16.783110100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      data1  data2\n",
      "key1              \n",
      "a         1      2\n",
      "b         1      1\n",
      "--------------------------------------------------\n",
      "         data1     data2\n",
      "key1                    \n",
      "a     3.800000  5.400000\n",
      "b     3.666667  4.333333\n",
      "--------------------------------------------------\n"
     ]
    },
    {
     "data": {
      "text/plain": "     data1                                              data2            \\\n     count      mean       std  min  25%  50%  75%  max count      mean   \nkey1                                                                      \na      5.0  3.800000  3.271085  1.0  2.0  2.0  5.0  9.0   5.0  5.400000   \nb      3.0  3.666667  4.618802  1.0  1.0  1.0  5.0  9.0   3.0  4.333333   \n\n                                         \n           std  min  25%  50%  75%  max  \nkey1                                     \na     2.701851  2.0  4.0  5.0  7.0  9.0  \nb     3.055050  1.0  3.0  5.0  6.0  7.0  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead tr th {\n        text-align: left;\n    }\n\n    .dataframe thead tr:last-of-type th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr>\n      <th></th>\n      <th colspan=\"8\" halign=\"left\">data1</th>\n      <th colspan=\"8\" halign=\"left\">data2</th>\n    </tr>\n    <tr>\n      <th></th>\n      <th>count</th>\n      <th>mean</th>\n      <th>std</th>\n      <th>min</th>\n      <th>25%</th>\n      <th>50%</th>\n      <th>75%</th>\n      <th>max</th>\n      <th>count</th>\n      <th>mean</th>\n      <th>std</th>\n      <th>min</th>\n      <th>25%</th>\n      <th>50%</th>\n      <th>75%</th>\n      <th>max</th>\n    </tr>\n    <tr>\n      <th>key1</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>5.0</td>\n      <td>3.800000</td>\n      <td>3.271085</td>\n      <td>1.0</td>\n      <td>2.0</td>\n      <td>2.0</td>\n      <td>5.0</td>\n      <td>9.0</td>\n      <td>5.0</td>\n      <td>5.400000</td>\n      <td>2.701851</td>\n      <td>2.0</td>\n      <td>4.0</td>\n      <td>5.0</td>\n      <td>7.0</td>\n      <td>9.0</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>3.0</td>\n      <td>3.666667</td>\n      <td>4.618802</td>\n      <td>1.0</td>\n      <td>1.0</td>\n      <td>1.0</td>\n      <td>5.0</td>\n      <td>9.0</td>\n      <td>3.0</td>\n      <td>4.333333</td>\n      <td>3.055050</td>\n      <td>1.0</td>\n      <td>3.0</td>\n      <td>5.0</td>\n      <td>6.0</td>\n      <td>7.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(df_obj.loc[:,['key1','data1','data2']].groupby(\"key1\").min())\n",
    "print('-'*50)\n",
    "\n",
    "#平均值\n",
    "print(df_obj.loc[:,['key1','data1','data2']].groupby(\"key1\").mean())\n",
    "print('-'*50)\n",
    "\n",
    "df_obj.groupby(\"key1\").describe()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.859600500Z",
     "start_time": "2024-05-04T05:58:16.801061600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 自定义聚合函数"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  key1  data1  data2\n",
      "0    a      5      7\n",
      "1    b      1      5\n",
      "2    a      2      4\n",
      "3    b      1      1\n",
      "4    a      2      5\n",
      "5    b      9      7\n",
      "6    a      1      9\n",
      "7    a      9      2\n",
      "--------------------------------------------------\n",
      "      data1  data2\n",
      "key1              \n",
      "a         8      7\n",
      "b         8      6\n",
      "--------------------------------------------------\n",
      "      data1  data2\n",
      "key1              \n",
      "a         8      7\n",
      "b         8      6\n"
     ]
    }
   ],
   "source": [
    "df_obj5 = pd.DataFrame(dict_obj)\n",
    "print(df_obj5)\n",
    "print('-'*50)\n",
    "\n",
    "def peak_range(df):\n",
    "    \"\"\"\n",
    "        返回数值范围,df参数是groupby后的 那一组的dataframe\n",
    "    \"\"\"\n",
    "    #print type(df) #参数为索引所对应的记录\n",
    "    return df.max() - df.min()\n",
    "\n",
    "#求极值\n",
    "print(df_obj5.loc[:,['key1','data1','data2']].groupby('key1').agg(peak_range))\n",
    "print('-'*50)\n",
    "\n",
    "print(df_obj5.loc[:,['key1','data1','data2']].groupby('key1').agg(lambda df : df.max() - df.min()))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T06:19:54.553930500Z",
     "start_time": "2024-05-04T06:19:54.515874Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         data1                                data2                           \n",
      "          mean       std count peak_range      mean       std count peak_range\n",
      "key1                                                                          \n",
      "a     3.800000  3.271085     5          8  5.400000  2.701851     5          7\n",
      "b     3.666667  4.618802     3          8  4.333333  3.055050     3          6\n"
     ]
    }
   ],
   "source": [
    "# 同时应用多个聚合函数\n",
    "print(df_obj.groupby('key1').agg(['mean', 'std', 'count', peak_range]))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.992712900Z",
     "start_time": "2024-05-04T05:58:16.876731100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         data1  data2\n",
      "key1                 \n",
      "a     3.800000      5\n",
      "b     3.666667      3\n"
     ]
    }
   ],
   "source": [
    "# 每列作用不同的聚合函数\n",
    "dict_mapping = {'data1':'mean',\n",
    "                'data2':'count'}\n",
    "print(df_obj.groupby('key1').agg(dict_mapping))\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:16.993711600Z",
     "start_time": "2024-05-04T05:58:16.884710100Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 星巴克全球开店"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       Brand  Store Number     Store Name Ownership Type     Street Address  \\\n",
      "0  Starbucks  47370-257954  Meritxell, 96       Licensed  Av. Meritxell, 96   \n",
      "\n",
      "               City State/Province Country Postcode Phone Number  \\\n",
      "0  Andorra la Vella              7      AD    AD500    376818720   \n",
      "\n",
      "                  Timezone  Longitude  Latitude  \n",
      "0  GMT+1:00 Europe/Andorra       1.53     42.51  \n",
      "--------------------------------------------------\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 25600 entries, 0 to 25599\n",
      "Data columns (total 13 columns):\n",
      " #   Column          Non-Null Count  Dtype  \n",
      "---  ------          --------------  -----  \n",
      " 0   Brand           25600 non-null  object \n",
      " 1   Store Number    25600 non-null  object \n",
      " 2   Store Name      25600 non-null  object \n",
      " 3   Ownership Type  25600 non-null  object \n",
      " 4   Street Address  25598 non-null  object \n",
      " 5   City            25585 non-null  object \n",
      " 6   State/Province  25600 non-null  object \n",
      " 7   Country         25600 non-null  object \n",
      " 8   Postcode        24078 non-null  object \n",
      " 9   Phone Number    18739 non-null  object \n",
      " 10  Timezone        25600 non-null  object \n",
      " 11  Longitude       25599 non-null  float64\n",
      " 12  Latitude        25599 non-null  float64\n",
      "dtypes: float64(2), object(11)\n",
      "memory usage: 2.5+ MB\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "file_path = \"./starbucks_store_worldwide.csv\"\n",
    "\n",
    "df = pd.read_csv(file_path)\n",
    "print(df.head(1)) #对每一列的数据是什么，有个把握\n",
    "print(\"-\"*50)\n",
    "\n",
    "print(df.info())  #数据总体有一个把控\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:17.193253600Z",
     "start_time": "2024-05-04T05:58:16.899670400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Country\n",
      "AD        1\n",
      "AE      144\n",
      "AR      108\n",
      "AT       18\n",
      "AU       22\n",
      "      ...  \n",
      "TT        3\n",
      "TW      394\n",
      "US    13608\n",
      "VN       25\n",
      "ZA        3\n",
      "Length: 73, dtype: int64\n",
      "--------------------------------------------------\n"
     ]
    },
    {
     "data": {
      "text/plain": "array([    1,   144,   108,    18,    22,     3,     4,    19,     5,\n          21,     5,     4,   102,    10,  1468,    61,    96,  2734,\n          11,    11,     3,    10,    28,   160,    21,    31,   101,\n           8,   132,   901,    28,     7,    16,   268,    73,    88,\n          17,  1237,     4,   993,   106,     8,    29,     2,     9,\n           2,   579,   234,    59,    17,    24,    12,     5,    89,\n         298,    53,    24,    11,    18,    27,   109,   102,    18,\n         130,     3,    11,   289,   326,     3,   394, 13608,    25,\n           3], dtype=int64)"
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看每个国家有多少家店\n",
    "grouped=df.groupby(by='Country')\n",
    "print(grouped.size())\n",
    "print(\"-\"*50)\n",
    "\n",
    "grouped.size().values"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:17.194251900Z",
     "start_time": "2024-05-04T05:58:17.041240800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Country\n",
      "AD        1\n",
      "AE      144\n",
      "AR      108\n",
      "AT       18\n",
      "AU       22\n",
      "      ...  \n",
      "TT        3\n",
      "TW      394\n",
      "US    13608\n",
      "VN       25\n",
      "ZA        3\n",
      "Name: Brand, Length: 73, dtype: int64\n",
      "--------------------------------------------------\n",
      "13608\n",
      "2734\n"
     ]
    }
   ],
   "source": [
    "#任取一列求count，也可以用country列自身\n",
    "country_count = grouped[\"Brand\"].count()\n",
    "print(country_count)\n",
    "print(\"-\"*50)\n",
    "\n",
    "print(country_count[\"US\"])\n",
    "print(country_count[\"CN\"])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:17.195248300Z",
     "start_time": "2024-05-04T05:58:17.062883800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Brand  Store Number  Store Name Ownership Type  \\\n",
      "2091  Starbucks  22901-225145   北京西站第一咖啡店  Company Owned   \n",
      "2092  Starbucks  32320-116537     北京华宇时尚店  Company Owned   \n",
      "2093  Starbucks  32447-132306  北京蓝色港湾圣拉娜店  Company Owned   \n",
      "2094  Starbucks  17477-161286  北京太阳宫凯德嘉茂店  Company Owned   \n",
      "2095  Starbucks  24520-237564     北京东三环北店  Company Owned   \n",
      "\n",
      "                                 Street Address City State/Province Country  \\\n",
      "2091                  丰台区, 北京西站通廊7-1号, 中关村南大街2号  北京市             11      CN   \n",
      "2092  海淀区, 数码大厦B座华宇时尚购物中心内, 蓝色港湾国际商区1座C1-3单元首层、  北京市             11      CN   \n",
      "2093        朝阳区朝阳公园路6号, 二层C1-3单元及二层阳台, 太阳宫中路12号  北京市             11      CN   \n",
      "2094          朝阳区, 太阳宫凯德嘉茂一层01-44/45号, 东三环北路27号  北京市             11      CN   \n",
      "2095              朝阳区, 嘉铭中心大厦A座B1层024商铺, 金融大街7号  北京市             11      CN   \n",
      "\n",
      "     Postcode  Phone Number                Timezone  Longitude  Latitude  \n",
      "2091   100073           NaN  GMT+08:00 Asia/Beijing     116.32     39.90  \n",
      "2092   100086  010-51626616  GMT+08:00 Asia/Beijing     116.32     39.97  \n",
      "2093   100020  010-59056343  GMT+08:00 Asia/Beijing     116.47     39.95  \n",
      "2094   100028  010-84150945  GMT+08:00 Asia/Beijing     116.45     39.97  \n",
      "2095      NaN           NaN  GMT+08:00 Asia/Beijing     116.46     39.93  \n",
      "--------------------------------------------------\n",
      "State/Province\n",
      "11    236\n",
      "12     58\n",
      "13     24\n",
      "14      8\n",
      "15      8\n",
      "21     57\n",
      "22     13\n",
      "23     16\n",
      "31    551\n",
      "32    354\n",
      "33    315\n",
      "34     26\n",
      "35     75\n",
      "36     13\n",
      "37     75\n",
      "41     21\n",
      "42     76\n",
      "43     35\n",
      "44    333\n",
      "45     21\n",
      "46     16\n",
      "50     41\n",
      "51    104\n",
      "52      9\n",
      "53     24\n",
      "61     42\n",
      "62      3\n",
      "63      3\n",
      "64      2\n",
      "91    162\n",
      "92     13\n",
      "Name: Brand, dtype: int64\n",
      "--------------------------------------------------\n",
      "State/Province\n",
      "31    551\n",
      "32    354\n",
      "44    333\n",
      "33    315\n",
      "11    236\n",
      "91    162\n",
      "51    104\n",
      "42     76\n",
      "35     75\n",
      "37     75\n",
      "12     58\n",
      "21     57\n",
      "61     42\n",
      "50     41\n",
      "43     35\n",
      "34     26\n",
      "53     24\n",
      "13     24\n",
      "45     21\n",
      "41     21\n",
      "46     16\n",
      "23     16\n",
      "36     13\n",
      "22     13\n",
      "92     13\n",
      "52      9\n",
      "15      8\n",
      "14      8\n",
      "62      3\n",
      "63      3\n",
      "64      2\n",
      "Name: Brand, dtype: int64\n",
      "--------------------------------------------------\n",
      "<class 'pandas.core.series.Series'>\n",
      "Index(['31', '32', '44', '33', '11', '91', '51', '42', '35', '37', '12', '21',\n",
      "       '61', '50', '43', '34', '53', '13', '45', '41', '46', '23', '36', '22',\n",
      "       '92', '52', '15', '14', '62', '63', '64'],\n",
      "      dtype='object', name='State/Province')\n"
     ]
    },
    {
     "data": {
      "text/plain": "<BarContainer object of 31 artists>"
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 1600x800 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABQIAAAKJCAYAAADgCVRUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAAxOAAAMTgF/d4wjAAA4aElEQVR4nO3de5zVdYH/8feBGQUURUJAf7RbW1mrm26XB4llomyJCOIlTC1dNy21dTW8pKZ42UzTJPOWj9x2qe3y8BIVi6FlXnqUN/7YSksf6dpaayqNXJQRgRnm/P7oIQ+FGTgOzjlf+Dyffy0zc/q+59x97TlzavV6vR4AAAAAYIs2qNUDAAAAAICBJwQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABSgrdUDXqler6enp97qGZU0aFCtUudN1fYk1dtUtT2JTY2o2p6kepuqtiexqRFV25NUb1PV9iQ2NaJqe5LqbaransSmRlRtT1K9TVXbk9jUiKrtSaq3qWp7Eps2N4MH9/26v0qFwJ6eepYsebHVMypp5MhtKnXeVG1PUr1NVduT2NSIqu1JqrepansSmxpRtT1J9TZVbU9iUyOqtiep3qaq7UlsakTV9iTV21S1PYlNjajanqR6m6q2J7Fpc7PjjsP7/J63BgMAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABWhr9YDS1GrNPW293v/jAQAAALDlEAKbaPiIYRnSPrjfpx81avhrPs3KrjVZvmxFv48JAAAAwJZBCGySWi0Z0j44e15yZzpXdTflmNtu3ZYHPjcpnTWvDAQAAAAonRDYZJ2rupsWAgEAAADgZT4sBAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUIC2Rn7otNNOy29/+9sMGTIkSXLyySfnr//6r3PeeeflhRdeyC677JIvfvGLGTZsWDo7O3PmmWfmD3/4Q4YOHZrZs2fnTW9600D+DgAAAADARjQUAn/zm9/k5ptvzogRI9Z+bfr06Tn33HMzfvz4XHXVVbn++utz+umn56qrrspuu+2W66+/Pvfff3/OPvvs3HjjjQO1HwAAAABowEbfGrx06dIsWbIkn/3sZzNt2rRce+21eeaZZ7J8+fKMHz8+STJjxowsWLAgSXL33XfnkEMOSZJMmDAhHR0defrppwfwVwAAAAAANmajrwhcvHhx9tprr1x00UXZeuutc+KJJ6atrS1jxoxZ+zOjR4/OokWLkiSLFi1a73vPPvtsdt55542OGTSolpEjt+nP78EG7LDD63+eVvGyqtqmqu1JbGpE1fYk1dtUtT2JTY2o2p6kepuqtiexqRFV25NUb1PV9iQ2NaJqe5LqbaransSmRlRtT1K9TVXbk9i0JdloCHzrW9+aq6++eu2/jz766HzrW99a7+dqtVqSpF6vr/e9QYMa+0ySnp56lix5saGf3dzUasmoUcNbcuylS19MLxfLJhk5cpvKXVZV21S1PYlNjajanqR6m6q2J7GpEVXbk1RvU9X2JDY1omp7kuptqtqexKZGVG1PUr1NVduT2NSIqu1JqrepansSmzY3O+7Yd3/aaKF7+OGHc9ddd639d09PT5Kko6Nj7dc6OjoyduzYJMmYMWP6/B4AAAAA0BobDYFdXV255JJL0tnZmdWrV+fGG2/M4YcfnqFDh2bhwoVJkrlz52afffZJkkycODFz585Nkjz44IMZNmyYEAgAAAAALbbRtwa/+93vzlFHHZUZM2ZkzZo12X///TN16tTssssumTVrVpYvX55x48Zl9uzZSZJTTz015557bqZOnZqtttoql19++YD/EgAAAADAhm00BCbJJz7xiXziE5941dd22WWX3HTTTev97HbbbZdrrrnm9VkHAAAAALwuGvsUDwAAAABgsyYEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACvCaQuBll12Ws88+O0ny2GOP5fDDD8/kyZNzyimnZMWKFUmSzs7OnHTSSZkyZUoOO+ywPPnkk6/7aAAAAADgtWk4BN5///35wQ9+sPbfZ555Zs4444zcfvvtectb3pLrr78+SXLVVVdlt912y4IFC3LGGWesDYcAAAAAQOs0FAKXLVuWK6+8MieeeGKS5Jlnnsny5cszfvz4JMmMGTOyYMGCJMndd9+dQw45JEkyYcKEdHR05Omnnx6I7QAAAABAg9oa+aHzzz8/M2fOzDPPPJMkWbRoUcaMGbP2+6NHj86iRYv6/N6zzz6bnXfeeaPHGTSolpEjt3lNvwAbt8MOr/95WsXLqmqbqrYnsakRVduTVG9T1fYkNjWianuS6m2q2p7EpkZUbU9SvU1V25PY1Iiq7Umqt6lqexKbGlG1PUn1NlVtT2LTlmSjIfCWW27JTjvtlAkTJuT73/9+kqSnp2e9n6vVakmSer2+3vcGDWrsHcg9PfUsWfJiQz+7uanVklGjhrfk2EuXvpheLpZNMnLkNpW7rKq2qWp7EpsaUbU9SfU2VW1PYlMjqrYnqd6mqu1JbGpE1fYk1dtUtT2JTY2o2p6kepuqtiexqRFV25NUb1PV9iQ2bW523LHv/rTRELhgwYJ0dHRk+vTpef7557NixYrUarV0dHSs/ZmOjo6MHTs2STJmzJh0dHRkp512Wu97AAAAAEBrbPSlenPmzMmtt96aefPm5ZRTTsl+++2XSy+9NEOHDs3ChQuTJHPnzs0+++yTJJk4cWLmzp2bJHnwwQczbNgwIRAAAAAAWqyhvxHYm9mzZ2fWrFlZvnx5xo0bl9mzZydJTj311Jx77rmZOnVqttpqq1x++eWv21gAAAAAoH9eUwg89NBDc+ihhyZJdtlll9x0003r/cx2222Xa6655vVZBwAAAAC8Lhr7FA8AAAAAYLMmBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEAB2lo9gNaq1Zp/2nq9/8cEAAAAoH+EwIINHzEsQ9oH9/v0o0YN79fpVnatyfJlK/p9XAAAAABeOyGwULVaMqR9cPa85M50rupu2nG33botD3xuUjprXhkIAAAA0ExCYOE6V3U3NQQCAAAA0Bo+LAQAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAoQFurB8C6arXmnrZe7//xAAAAADYXQiCVMnzEsAxpH9zv048aNfw1n2Zl15osX7ai38cEAAAA2BwIgVRGrZYMaR+cPS+5M52ruptyzG23bssDn5uUzppXBgIAAABbNiGQyulc1d20EAgAAABQCh8WAgAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAHaWj0ANge1WnNPV6/373QAAAAAfRECYSOGjxiWIe2D+3XaUaOG9+t0K7vWZPmyFf06LQAAAEBvhEDYgFotGdI+OHtecmc6V3U35Zjbbt2WBz43KZ01rwwEAAAAXj9CIDSgc1V300IgAAAAwEDwYSEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACtBwCLziiisyZcqUHHjggZkzZ06SZOHChZk+fXr233//XHDBBenu7k6SLFq0KEcffXQOOOCAHHPMMVm8ePHArAcAAAAAGtJQCLznnnvyq1/9KvPnz8/cuXPzrW99K7///e9z1lln5Stf+Upuv/32rFy5Mt/73veSJBdddFEOPfTQ3HbbbTnooIPyhS98YUB/CQAAAABgwxoKgRMnTsycOXMyePDgLF68OGvWrMmSJUsybty4vPnNb06tVstHPvKRLFiwIF1dXXnwwQczderUJMnBBx+ce+65J11dXQP6iwAAAAAAfWtr9Afb29tz5ZVX5hvf+EYOOOCAPPvssxkzZsza748ePTqLFi3KsmXLss0226S9vf0vB2hry7bbbpslS5a86ud7M2hQLSNHbtPPX4W+7LBD9c7Tqm2q2p5kYDZV8TZWtU1V25NUb1PV9iQ2NaJqe5LqbaransSmRlRtT1K9TVXbk9jUiKrtSaq3qWp7EpsaUbU9SfU2VW1PYtOWpOEQmCQzZ87MCSeckBNPPDFPPvnket+v1Wrp6enp9bSDBm38xYc9PfUsWfLia5m02ajVklGjhrfk2EuXvph6vTp7kupt6m1PUs1Nm2LkyG0qdxur2qaq7Umqt6lqexKbGlG1PUn1NlVtT2JTI6q2J6nepqrtSWxqRNX2JNXbVLU9iU2NqNqepHqbqrYnsWlzs+OOfXeMht4a/Pjjj+d3v/tdkmTYsGHZf//9s3DhwnR0dKz9mY6OjowdOzYjR45MZ2fn2g8O6e7uzosvvpgRI0Zswq8AAAAAAGyKhkLgE088kQsvvDBdXV1ZvXp17rjjjhxxxBF58skn88QTTyRJ5s6dm4kTJ6a9vT3jx4/P/PnzkyTz58/P+PHj175VGAAAAABovobeGjx58uQ88sgjmT59egYPHpzJkydnypQpGTVqVE4//fSsXLkyu+++e4466qgkyQUXXJBzzjknX//617P99tvniiuuGNBfAgAAAADYsIb/RuBpp52W00477VVfGz9+fH74wx+u97M77bRTvvGNb2zqNgAAAADgddLQW4MBAAAAgM2bEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAArS1egDw2tVqzT9tvd7/YwIAAACtJwTCZmb4iGEZ0j6436cfNWp4v063smtNli9b0e/jAgAAAK0lBMJmpFZLhrQPzp6X3JnOVd1NO+62W7flgc9NSmfNKwMBAABgcyUEwmaoc1V3U0MgAAAAsPnzYSEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAjQUAufMmZOpU6dm6tSpOeecc7J69eo89thjOfzwwzN58uSccsopWbFiRZKks7MzJ510UqZMmZLDDjssTz755EDuBwAAAAAasNEQ+NBDD+X73/9+br755syfPz/d3d357ne/mzPPPDNnnHFGbr/99rzlLW/J9ddfnyS56qqrsttuu2XBggU544wzcvbZZw/4LwEAAAAAbNhGQ+B2222XWbNmZdiwYanVannHO96R3/3ud1m+fHnGjx+fJJkxY0YWLFiQJLn77rtzyCGHJEkmTJiQjo6OPP300wP4KwAAAAAAG9O2sR9405velDe96U1JksWLF+c73/lOjjjiiFe95Xf06NFZtGhRkmTRokUZM2bMq7737LPPZuedd97omEGDahk5cpvX+CuwMTvsUL3ztGqbqrYnKWdT1W73VduTVG9T1fYkNjWianuS6m2q2p7EpkZUbU9SvU1V25PY1Iiq7Umqt6lqexKbGlG1PUn1NlVtT2LTlmSjIfBlTz31VE444YTMmDEj733ve3P33Xe/6vu1Wi1JUq/X1zvtoEGNfSZJT089S5a82OikzUqtlowaNbwlx1669MWse7G0ck9SvU297Umqt6mKl9umGjlym0rd7qu2J6nepqrtSWxqRNX2JNXbVLU9iU2NqNqepHqbqrYnsakRVduTVG9T1fYkNjWianuS6m2q2p7Eps3Njjv23Q0aKnSPPvpojjzyyBxxxBE56aSTMnbs2HR0dKz9fkdHR8aOHZskGTNmTJ/fAwAAAABaY6MhcMmSJTn++OMza9asHH300UmSnXfeOUOHDs3ChQuTJHPnzs0+++yTJJk4cWLmzp2bJHnwwQczbNgwIRAAAAAAWmyjbw3+5je/mc7Ozlx33XW57rrrkvwl9s2ePTuzZs3K8uXLM27cuMyePTtJcuqpp+bcc8/N1KlTs9VWW+Xyyy8f2N8AAAAAANiojYbAmTNnZubMmb1+76abblrva9ttt12uueaaTV8GAAAAALxuGvsUDwAAAABgsyYEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAdpaPQDYMtRqzT1tvd7/4wEAAECJhEBgkw0fMSxD2gf3+/SjRg1/zadZ2bUmy5et6PcxAQAAoDRCILBJarVkSPvg7HnJnelc1d2UY267dVse+NykdNa8MhAAAAAaJQQCr4vOVd1NC4EAAADAa+fDQgAAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABCIAAAAAAUQAgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACiAEAgAAAEABhEAAAAAAKIAQCAAAAAAFEAIBAAAAoABtrR4AMBBqteaftl7v/zEBAABgoAmBwBZn+IhhGdI+uN+nHzVqeL9Ot7JrTZYvW9Hv4wIAAMBAEgKBLUqtlgxpH5w9L7kznau6m3bcbbduywOfm5TOmlcGAgAAUE0Nh8DOzs4ceeSRuf766zNu3LgsXLgwX/jCF7Jy5crsueeemTVrVtra2rJo0aKcccYZee6557LjjjvmyiuvzBve8IaB/B0A1tO5qrupIRAAAACqrqEPC/nlL3+ZI488Mv/7v/+bJFm9enXOOuusfOUrX8ntt9+elStX5nvf+16S5KKLLsqhhx6a2267LQcddFC+8IUvDNx6AAAAAKAhDYXAm2++ORdccEFGjx6dJHnooYcybty4vPnNb06tVstHPvKRLFiwIF1dXXnwwQczderUJMnBBx+ce+65J11dXQP3GwAAAAAAG9XQW4MvvfTSV/170aJFGTNmzNp/jx49OosWLcqyZcuyzTbbpL29/S//421t2XbbbbNkyZJX/XxfBg2qZeTIbV7Lfhqwww7VO0+rtqlqexKbGlG1PcnAbKrafWPV9iQ2NaJqe5LqbaransSmRlRtT1K9TVXbk9jUiKrtSaq3qWp7EpsaUbU9SfU2VW1PYtOWpF8fFtLT07Pe12q1Wq9fT5JBgxp64WF6eupZsuTF/kyqvFqt/59EuqmWLn1xvQ8vaOWepHqbetuTVG+Ty63ae5K+r0ubYuTIbSp131i1PYlNjajanqR6m6q2J7GpEVXbk1RvU9X2JDY1omp7kuptqtqexKZGVG1PUr1NVduT2LS52XHHvv+buLFCt46xY8emo6Nj7b87OjoyduzYjBw5Mp2dnenu/ssf6O/u7s6LL76YESNG9OcwAAAAAMDrpF8hcI899siTTz6ZJ554Ikkyd+7cTJw4Me3t7Rk/fnzmz5+fJJk/f37Gjx+/9q3CAAAAAEBr9OutwVtttVW+9KUv5fTTT8/KlSuz++6756ijjkqSXHDBBTnnnHPy9a9/Pdtvv32uuOKK13UwAAAAAPDavaYQeNddd639v8ePH58f/vCH6/3MTjvtlG984xubugsAAAAAeB31663BAAAAAMDmRQgEAAAAgAIIgQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAArS1egBAKWq15p62Xu//8QAAANjyCIEATTB8xLAMaR/c79OPGjX8NZ9mZdeaLF+2YoM/09842d/TbSxONjuWJoIpAABQDiEQYIDVasmQ9sHZ85I707mquynH3HbrtjzwuUnprPUdujYlTvYnTCYbjpOtiKUb2wQAALAlEQIBmqRzVXfTQuDGVC1OtmLPxjYBAABsaYRAgIJVKU4m1dsDAACwJfGpwQAAAABQACEQAAAAAAogBAIAAABAAYRAAAAAACiAEAgAAAAABRACAQAAAKAAQiAAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAG2tHgAAVVarNfe09Xr/jwcAALAhQiAA9GH4iGEZ0j6436cfNWr4az7Nyq41Wb5sRb+PCQAA0BchEAB6UaslQ9oHZ89L7kznqu6mHHPbrdvywOcmpbPW9ysDm/0KxcSrFAEAYEshBALABnSu6m5aCNyYVrxCMfEqRQAA2FIIgQCwGWjFKxSTxl6lCAAAbB6EQADYjFTpFYoAAMDmZVCrBwAAAAAAA08IBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABRACAQAAACAAgiBAAAAAFAAIRAAAAAACtDW6gEAwOatVmvuaev1/h8PAABKJgQCAP02fMSwDGkf3O/Tjxo1/DWfZmXXmixftmKDP9PfONnf04mTAABsDoRAAKBfarVkSPvg7HnJnelc1d2UY267dVse+NykdNb6jm+bEif7EyaTxuIkAAC0mhAIAGySzlXdTQuBG1PVOAkAAFUgBAIAW5wqxUkAAKgKnxoMAAAAAAUQAgEAAACgAN4aDAAwwPr7acSbclp/rxAAgHUJgQAAA2hTPsU48UnGAAC8foRAAIAB0opPMU58kjEAAL0TAgEABphPMQYAoAp8WAgAAAAAFEAIBAAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAoQFurBwAA0Hy1WnNPW6/3/3gAALw+hEAAgMIMHzEsQ9oH9/v0o0YNf82nWdm1JsuXrej3MQEA2HRCIABAQWq1ZEj74Ox5yZ3pXNXdlGNuu3VbHvjcpHTW+n5lYLNfoZhs/FWKXjUJAGxphEAAgAJ1rupuWgjcmFa8QjHZ8KsUvWoSANgSCYEAALRMK16hmGz4VYpVfdUkAMCmEgIBAGi5Kr1C8WVV3NTftytvKW+fToRSANgUQiAAAGwGNuXtylvK26c3tgkA2DAhEAAAKq5qb1eu4lu6AYCNEwIBAGAzUbW3K1dtDwCwYYNaPQAAAAAAGHheEQgAAGwxmv0BJj5QBYDNiRAIAABsEVrxASY+UAWAzYkQCAAAbPZ8oMrGNwGAEAgAAGwxqvYBJlXbk1Tv7dP9/d/dlNN5SzdQKiEQAACgEFV7+/SmbhqIt097SzewJRMCAQAAClC1t09XcZO3dANbOiEQAACgIFV8u3LVNlVtD8DrZVCrBwAAAAAAA88rAgEAAKDiqvghL8DmRwgEAACACqvih7xU8ZOVxVLYOCEQAAAAKqpqH6iSVPOTlasYS6GKhEAAAACouKp8gEkVP1m5irEUqkoIBAAAAF6TqoTJV6riJqganxoMAAAAAAXwikAAAACAAdDfDzDZUj5QJfHW6aoZsBB422235dprr01XV1cOOuignHzyyQN1KAAAAIBK2ZQPMNlSPlBlY5sSn/bcbAMSAjs6OnL55Zdn7ty5GT58eD75yU/m5z//efbee++BOBwAAABAZVTtA0yq+CEviU97boUBCYH33ntv9txzz4wcOTJJcvDBB2fBggVCYP5yI6jSsZq5p9HjVe08ei0/93qo2nnU6PGcR9Xa5LrdmKptqtqeRo9XtfPotfzc66Fq51Gjx3MeVWuT63ZjqrapansaPV7VzqPX8nOvh6qdR40ez3lUrU3N/v23RC/HyUmzf5YXmxQnt9m6LXeevk/Rn/Zcq9df/1/9hhtuyIsvvpiZM2cmSe677758/etfz3/8x3+83ocCAAAAABowIJ8a3NPTs97Xapvypm8AAAAAYJMMSAgcO3ZsOjo61v77z3/+c8aOHTsQhwIAAAAAGjAgIXDChAl54IEH8txzz6Wrqyv/9V//lYkTJw7EoQAAAACABgzI3whMkttuuy1f/epXs3r16uy3334566yzBuIwAAAAAEADBiwEAgAAAADVMSBvDQYAAAAAqkUIBAAAAIACCIEAAAAAUAAhsIKuuOKKTJkyJQceeGDmzJmz9utdXV059thj8+CDD7Z8z/z58zNt2rRMmzYt//zP/5znn3++qZtedtlll+Xss89+1dfuueee7Lfffk3f0tv59M1vfjNTpkzJlClTctlll6XZf5Kzs7Mz06ZNy1NPPbX2a2effXa+//3vN3XHy2644Ybsv//+mTZtWq6//vpXfa8Vu0477bTsv//+mT59eqZPn5477rijpXte6ZXX7XvvvTeHHnpoDjrooPzjP/5j/vSnPzV1S2/X7dmzZ2e//fZbe9595zvfadmeO++8c+2O6dOnZ6+99spRRx3VtD0vW/f21sr7yTlz5mTq1KmZOnVqzjnnnKxevXrt91p13b7rrrty6KGHZvLkybn44ovXfr1Vj21J7/cBjz32WA4//PBMnjw5p5xySlasWNH0Xa+8/S9YsCDTpk3LgQcemLPPPvtVl2Uz9PWcJEm+853v5Oijj27qnt4us1afR8n6t/9WP/4nr74e3XjjjfnABz6w9ny78sorm75n3U0vq9Lj7e9///scffTROeigg3Lccce1/H671dft3m7/v/zlL3P44YfnwAMPzGmnndbUTb2dR61+jtTbc9tWb+rtfrLVm9a9j2zl88i+Nr2sFY9tSe/Pk1r5XLK3Pa1+bOvruWTSustts1WnUu6+++76xz72sXp3d3f9pZdequ+77771J554ov4///M/9Y9+9KP1d77znfUHHnigpXsefPDB+t57711fvHhxvV6v17/85S/XP//5zzdt08vuu++++vve9776WWedtfZrHR0d9cmTJ9f33Xffpm7p7Xx69NFH6x/+8IfrK1asqHd3d9c/+tGP1n/+8583bdN///d/16dOnVrfbbfd6v/3f/9Xf/bZZ+snnHBCfffdd6/PnTu3aTtedu+999YPPPDA+gsvvFDv7u6un3DCCfUf//jHLd31oQ99qL506dJXfa3V51O9/urr9qpVq+p77bVX/fe//329Xq/Xb7rppvqJJ57YtC193Scdc8wx9d/+9rdN27GxPS9bvHhxfdKkSfVHH320qbt6u7216n7y17/+dX3q1Kn1F198sd7T01M/44wz6nPmzGnpdfuPf/xj/QMf+ED9mWeeqa9evbp+5JFH1u++++6WPba9rLf7gIMOOqj+4IMP1uv1ev0rX/lK/Yorrmjqplfe/pctW1Z///vfv/Z69JnPfKZ+4403Nm3Lhm5vjz/+eH3vvfeuf/zjH2/annp9/cus1edRvb7+7f+xxx5r6eN/vb7+c6Rzzjmn/pOf/KSpGza2qWqPtz09PfUPf/jD9Z/97Gf1er1e/9KXvlT/4he/2JQdfd1vV+32/+ijj9bf//73r32MnTlzZv3b3/52U/b0dh597Wtfa+lzpN6e2/7oRz9q6aZ6ff37yVY/l1z3PrJer7fseeSGNtXrrXts6+150rx581r2XLK3Pd/+9rdb+tjW13PJer11l9vmzCsCK2bixImZM2dOBg8enMWLF2fNmjUZNmxYvve97+X444/PHnvs0fI9b3zjG/Ov//qvGTlyZJLkb//2b/PMM880ddeyZcty5ZVX5sQTT3zV188777ycfPLJTd2S9H4+jRgxIrfeemuGDh2aF154IZ2dndluu+2atunmm2/OBRdckNGjRydJ5s2bl0mTJuWAAw5o2oZXeuSRR7L33ntn+PDhGTx4cPbee+/89Kc/bdmupUuXZsmSJfnsZz+badOm5dprr029Xm/5+bTudXv16tU599xz8+Y3vzlJ829vvV23hw4dmkcffTTXXXddpk2blosvvrhprwjo6z7yZV/+8pdz8MEH5x3veEdT9rxs3dtbrVZr2f3kdtttl1mzZmXYsGGp1Wp5xzvekaeffrql1+077rgjU6ZMydixY9Pe3p4rr7wye+yxR8se25Le7wOeeeaZLF++POPHj0+SzJgxIwsWLGjapnVv/9tvv33uvvvujBw5MitWrMjixYub+jjS1+1t9erVOf/883Pqqac2bUvS+2W23XbbtfQ8Sta//b/tbW9r6eN/b8+RHn744dxyyy056KCD8tnPfjYvvPBC0/b0talqj7e//e1vM2zYsHzwgx9Mkpx44on5+Mc/3pQtfd1vV+32/+ijj+bv//7v1z7GnnfeefnQhz7UlD29nUfPPfdcS58j9fbc9kc/+lFLN/V2P9nq55Lr3kfW6/WWPY/sa1OSlj22Jb0/T3rf+97XsueSve2ZMmVKSx/b+nou2crLbXMmBFbQK29sEyZMyJgxY3LWWWflH/7hHyqxZ+zYsZk4cWKS5KWXXsoNN9zQ9G3nn39+Zs6c+ao7n//8z//Mrrvu2pL/oEx6v9za29vz3e9+N5MmTcqOO+7Y1Dhx6aWX5r3vfe/af3/qU5/KjBkzmnb8de222275xS9+kWXLlmXVqlW566678txzz7Vs1+LFi7PXXnvlsssuy0033ZSFCxdm7ty5LT+f1r1ub7vttpkyZUqSZM2aNbn22mubfntb97q99dZb513velfOOeec/OAHP8jSpUvXe6t3M/eMGTMmSfLUU0/lnnvuyXHHHde0LS9b9/Y2evTolt1PvulNb1obshYvXpzvfOc7mTRpUkuv23/4wx/S09OT4447LtOmTct3v/vdjBgxoqWPbb3dB8ybN2/t9Sn5y+W4aNGipm3q7bGtvb09d911V/bdd98sXbo0H/jAB5q25+Xjr3t7mz17dg477LCMGzeuqVv6ut9u9Xm07u0/SUsf/9e9HvX09GSnnXbKqaeemnnz5mX06NHrvaWq2ZuS1j8vWXfTH//4x+y4444555xzMm3atFxwwQXZZpttmrKlr/vtVl+31739d3R0ZJtttsm//Mu/ZNq0abn66quz/fbbN2VLX+dRK58j9fbc9qWXXmrppt7uJ2+//faWblr3PnLp0qUtfR7Z26YkLXtsS3p/ntTK55J9PW9r5WNbX5taebltzoTAipo5c2buv//+PP3007n55ptbPafXPUuXLs1xxx2XXXfdNYccckjTttxyyy3ZaaedMmHChLVfe+yxx/KTn/wkn/70p5u2oze9nU9HHXVUFi5cmJEjR+baa69t6b5WmjBhQg499NAcffTROf744/Oe97wn7e3tLdvz1re+NVdffXV22GGHDBs2LEcffXTuueeelu1Jer9uv2zlypX5zGc+k56enpxwwglN3/bK6/Ydd9yRr33taxk3blza2tpy3HHHNf286+22dvPNN+ejH/1ohg4d2tQtG9Kq+8nkL2H0mGOOyYwZM/K+972vqcde15o1a/Lzn/88l19+eW655ZY8/PDD+cEPftDSTb3dB9x3333r/VytVmvKng3d/vfbb7888MAD+eAHP5gLL7ywKXte6ZW3t5tuuinPPPNMDjvssKbv2ND9dqvPo9604vG/t+vRoEGDcsMNN2S33XZLrVbLJz/5yabeZ2/out0qvW3q7u7O/fffnyOPPDLz58/PG9/4xnzxi19s6q7e7rdbfd1+5e1/9erV+dnPfpYzzzwzP/zhD7Ny5crccMMNTd3T23nUqudIG3pu26pNG7qfbPVzyZeNHDmy5c8j13Xvvfe27LEt2fDzpFY8l9zQnlb9t21vm2655ZaWXm6bMyGwYh5//PH87ne/S5IMGzYs+++//9p/V2nPn/70pxx55JF517velc9//vNN3bRgwYLce++9mT59eq6++urcdddduemmm9LR0ZHDDjssn/rUp/LnP/85RxxxRNM29XY+3XXXXfnVr36VJGlra8u0adNaelm2WmdnZz70oQ9l/vz5+da3vpWhQ4fmjW98Y8v2PPzww7nrrrvW/runpyeDBw9u2Z6k9+v2xRdfnOeffz7HHntstt5663z1q19takDt7bp95513Zt68eWt/ppnn3YbuI3/6059m6tSpTdnRiFbeTz766KM58sgjc8QRR+Skk05q6rF7M2rUqEyYMCFveMMbMmTIkEyaNCkPPfRQSzf1dh+QJB0dHWu/1tHRkbFjxzZlT2+3/wsuuCD3339/kr8EyYMPPripjyO93d5+/etf5/HHH8/06dNz3nnn5Te/+U1OOeWUpuzp7TJ7/vnnW3oe9eapp55q2eN/b9ejCy+88FV/iL/Zj3d9Pba1Um+b/u3f/i1/9Vd/ld133z1JMnXq1KbeT617v71kyZLK3f5vuOGG7L777vmrv/qrDB48OAcccEBLz6MkLX2O1Ndz21Zu6uv5bSs3revJJ59s2fPIvtx6660te2xL+n6e1Krnkr3tue2221r637a9bfrlL3/Z0sttcyYEVswTTzyRCy+8MF1dXVm9enXuuOOO9V623Oo9u+++e4477rgcccQROfPMM5v2aomXzZkzJ7feemvmzZuXU045Jfvtt19mzZqVH//4x5k3b15uuOGGjB49OjfeeGPTNvV1uZ155pnp7OxMT09PbrvttpZelq32pz/9KSeddFK6urqyfPny3HLLLS37u0DJXz6p9JJLLklnZ2dWr16dG2+8sWl/56YvvV23zz333Jx88snZfffdc8UVVzT9iVtv1+0DDjggX/ziF/Pss8+mXq/n29/+dtPOu75ua8uWLcvy5cvX/v2bVlu1alXL7ieXLFmS448/PrNmzarMp6ftu+++ue+++/L8889nzZo1+cUvfpHddtutpZt6uw84/PDDM3To0CxcuDBJMnfu3Oyzzz5N2dPb7f/Tn/50Tj/99LVvT/7Rj37U1MeR3m5ve++9d2677bbMmzcvF198cf7u7/4uV199dVP29HaZzZgxo6XnUW+WLl3assf/3q5HZ511Vq655po8+uijSZJvfetbTX28623Teeed17TjN7rplltuyfPPP59HHnkkSfKzn/0su+66a1P29Ha/3dXVVbnb/0UXXZRHHnlk7SfOtvo8qtfrLX2O1Ndz21Zu6uv5bSs3rau9vb1lzyP7cumll7bssS3p/XnSW9/61pY9l+xtz5577tnS/7btbdO73/3ull5um7O2Vg/g1SZPnpxHHnkk06dPz+DBgzN58uS1f9OhKntWr16dP/7xj/nBD36w9iXCu+66ay699NKW7Wy13s6nT37ykxkyZEg++tGPZtCgQRk/fnyOPfbYVk9tmbe//e2ZOnVqpk+fnjVr1uTYY4/Ne97znpbtefe7352jjjoqM2bMyJo1a7L//vtX6tVkL7vvvvuycOHCLFu2LNOnT0/yl/+P2L//+7835fi9XbcPOeSQDB48OMcdd1y6u7vznve8J//0T//Usj1TpkzJQw89lJ122qkpGxoxb968lt1PfvOb30xnZ2euu+66XHfddUn+8kffZ86cOeDH7ssee+yRT37yk/nYxz6Wrq6utW+naqW+7gN22WWXzJo1K8uXL8+4ceMye/bslm0cM2ZMzjvvvBx//PGp1Wp529velosuuqhpx6/ac5LeLrODDjoobW1tLTuPevPOd74zxxxzTGUe/4cOHZrZs2fn7LPPzqpVq/I3f/M3ueyyy1q2p6qGDBmS6667Lueff35eeumljB49Ol/60peacuy+7rerdvs/5JBDssMOO+Skk07K6tWr8/a3vz1nnHFGU/b0dh4NHTo0v/zlL1v2HKm357YrV65s6fO23u4nd9hhh5ZuWtf/+3//L+ecc05LnkdWVW/Pk9ra2lr2XLK3Pccee2y22mqrlj22VfG55OasVq/X660eAQAAAAAMLG8NBgAAAIACCIEAAAAAUAAhEAAAAAAKIAQCAAAAQAGEQAAAAAAogBAIAAAAAAUQAgEAAACgAEIgAAAAABTg/wNIYfBG3KChAQAAAABJRU5ErkJggg=="
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#中国不同省份的星巴克数量，图像绘制\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "country_cn=df[df['Country']=='CN']  # 筛选出中国的样本，类型还是df\n",
    "print(country_cn.head())\n",
    "print(\"-\"*50)\n",
    "\n",
    "print(country_cn.groupby(\"State/Province\").count()['Brand'])  # 我也不懂为什么取brand\n",
    "print(\"-\"*50)\n",
    "\n",
    "china_province=country_cn.groupby(\"State/Province\").count()['Brand'].sort_values(ascending=False)\n",
    "print(china_province)\n",
    "print(\"-\"*50)\n",
    "\n",
    "print(type(china_province))\n",
    "print(china_province.index)\n",
    "\n",
    "plt.figure(figsize=(20,10),dpi=80)\n",
    "plt.bar(china_province.index, china_province.values)  # 柱状图，"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:17.666971400Z",
     "start_time": "2024-05-04T05:58:17.072858400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Brand              object\n",
      "Store Number       object\n",
      "Store Name         object\n",
      "Ownership Type     object\n",
      "Street Address     object\n",
      "City               object\n",
      "State/Province     object\n",
      "Country            object\n",
      "Postcode           object\n",
      "Phone Number       object\n",
      "Timezone           object\n",
      "Longitude         float64\n",
      "Latitude          float64\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "print(country_cn.dtypes)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:17.853471600Z",
     "start_time": "2024-05-04T05:58:17.671957200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "outputs": [
    {
     "data": {
      "text/plain": "          Brand  Store Number                         Store Name  \\\n3416  Starbucks  26600-234324  Hangzhou Kerry Central NO.1 Store   \n3417  Starbucks  50006-272324   Hangzhou Dahua Xixifengqing-1F2F   \n3418  Starbucks  29588-253266         Hangzhou Tianyang D32 - GF   \n3419  Starbucks  28960-251924     Hangzhou Lanzuantiancheng - GF   \n3420  Starbucks  28955-249430            Ningbo Bali Sunday - GF   \n...         ...           ...                                ...   \n3726  Starbucks  29039-252212                       高速公路金华服务区南区店   \n3727  Starbucks  49055-266870                          金华义乌国际大厦店   \n3728  Starbucks  22749-224344                              金华万达店   \n3729  Starbucks  15314-160109                              金华银泰店   \n3730  Starbucks  47625-258991                          金华义乌万达广场店   \n\n     Ownership Type                                     Street Address  \\\n3416  Joint Venture  intersection Yanan Road and Qingchun Roa, xiac...   \n3417  Joint Venture          West Wenyi Rd&Yongfu Rd, Yu Hang District   \n3418  Joint Venture                             xiao he road, gong shu   \n3419  Joint Venture                             pin shui road, gongshu   \n3420  Joint Venture                NO.555 qian hu south road, yin zhou   \n...             ...                                                ...   \n3726  Joint Venture                                 浙江省金华市, 义乌市宾王路218号   \n3727  Joint Venture                            浙江省金华市, 国际大厦主楼一层1F-01商铺   \n3728  Joint Venture                                 李渔东路366号, 解放东路168号   \n3729  Joint Venture                   婺城区, 金华银泰天地一楼, 义乌市新科路9号1层1067号商铺   \n3730  Joint Venture                                 浙江省金华市, 长江东路与东二环交口   \n\n          City State/Province Country Postcode   Phone Number  \\\n3416  Hangzhou             33      CN   310004            NaN   \n3417  Hangzhou             33      CN   311121            NaN   \n3418  hangzhou             33      CN  3120000            NaN   \n3419  hangzhou             33      CN   310000            NaN   \n3420  hangzhou             33      CN   310000            NaN   \n...        ...            ...     ...      ...            ...   \n3726       金华市             33      CN   321000            NaN   \n3727       金华市             33      CN   322300            NaN   \n3728       金华市             33      CN   321000  0579-82829003   \n3729       金华市             33      CN   321000  0579-82226035   \n3730       金华市             33      CN   322000            NaN   \n\n                    Timezone  Longitude  Latitude  \n3416  GMT+08:00 Asia/Beijing     120.16     30.26  \n3417  GMT+08:00 Asia/Beijing     120.04     30.28  \n3418  GMT+08:00 Asia/Beijing     120.14     30.31  \n3419  GMT+08:00 Asia/Beijing     120.12     30.31  \n3420  GMT+08:00 Asia/Beijing     121.57     29.83  \n...                      ...        ...       ...  \n3726  GMT+08:00 Asia/Beijing     119.89     29.24  \n3727  GMT+08:00 Asia/Beijing     120.09     29.31  \n3728  GMT+08:00 Asia/Beijing     119.68     29.09  \n3729  GMT+08:00 Asia/Beijing     119.65     29.10  \n3730  GMT+08:00 Asia/Beijing     120.04     29.28  \n\n[315 rows x 13 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Brand</th>\n      <th>Store Number</th>\n      <th>Store Name</th>\n      <th>Ownership Type</th>\n      <th>Street Address</th>\n      <th>City</th>\n      <th>State/Province</th>\n      <th>Country</th>\n      <th>Postcode</th>\n      <th>Phone Number</th>\n      <th>Timezone</th>\n      <th>Longitude</th>\n      <th>Latitude</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>3416</th>\n      <td>Starbucks</td>\n      <td>26600-234324</td>\n      <td>Hangzhou Kerry Central NO.1 Store</td>\n      <td>Joint Venture</td>\n      <td>intersection Yanan Road and Qingchun Roa, xiac...</td>\n      <td>Hangzhou</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>310004</td>\n      <td>NaN</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>120.16</td>\n      <td>30.26</td>\n    </tr>\n    <tr>\n      <th>3417</th>\n      <td>Starbucks</td>\n      <td>50006-272324</td>\n      <td>Hangzhou Dahua Xixifengqing-1F2F</td>\n      <td>Joint Venture</td>\n      <td>West Wenyi Rd&amp;Yongfu Rd, Yu Hang District</td>\n      <td>Hangzhou</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>311121</td>\n      <td>NaN</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>120.04</td>\n      <td>30.28</td>\n    </tr>\n    <tr>\n      <th>3418</th>\n      <td>Starbucks</td>\n      <td>29588-253266</td>\n      <td>Hangzhou Tianyang D32 - GF</td>\n      <td>Joint Venture</td>\n      <td>xiao he road, gong shu</td>\n      <td>hangzhou</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>3120000</td>\n      <td>NaN</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>120.14</td>\n      <td>30.31</td>\n    </tr>\n    <tr>\n      <th>3419</th>\n      <td>Starbucks</td>\n      <td>28960-251924</td>\n      <td>Hangzhou Lanzuantiancheng - GF</td>\n      <td>Joint Venture</td>\n      <td>pin shui road, gongshu</td>\n      <td>hangzhou</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>310000</td>\n      <td>NaN</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>120.12</td>\n      <td>30.31</td>\n    </tr>\n    <tr>\n      <th>3420</th>\n      <td>Starbucks</td>\n      <td>28955-249430</td>\n      <td>Ningbo Bali Sunday - GF</td>\n      <td>Joint Venture</td>\n      <td>NO.555 qian hu south road, yin zhou</td>\n      <td>hangzhou</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>310000</td>\n      <td>NaN</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>121.57</td>\n      <td>29.83</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>3726</th>\n      <td>Starbucks</td>\n      <td>29039-252212</td>\n      <td>高速公路金华服务区南区店</td>\n      <td>Joint Venture</td>\n      <td>浙江省金华市, 义乌市宾王路218号</td>\n      <td>金华市</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>321000</td>\n      <td>NaN</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>119.89</td>\n      <td>29.24</td>\n    </tr>\n    <tr>\n      <th>3727</th>\n      <td>Starbucks</td>\n      <td>49055-266870</td>\n      <td>金华义乌国际大厦店</td>\n      <td>Joint Venture</td>\n      <td>浙江省金华市, 国际大厦主楼一层1F-01商铺</td>\n      <td>金华市</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>322300</td>\n      <td>NaN</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>120.09</td>\n      <td>29.31</td>\n    </tr>\n    <tr>\n      <th>3728</th>\n      <td>Starbucks</td>\n      <td>22749-224344</td>\n      <td>金华万达店</td>\n      <td>Joint Venture</td>\n      <td>李渔东路366号, 解放东路168号</td>\n      <td>金华市</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>321000</td>\n      <td>0579-82829003</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>119.68</td>\n      <td>29.09</td>\n    </tr>\n    <tr>\n      <th>3729</th>\n      <td>Starbucks</td>\n      <td>15314-160109</td>\n      <td>金华银泰店</td>\n      <td>Joint Venture</td>\n      <td>婺城区, 金华银泰天地一楼, 义乌市新科路9号1层1067号商铺</td>\n      <td>金华市</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>321000</td>\n      <td>0579-82226035</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>119.65</td>\n      <td>29.10</td>\n    </tr>\n    <tr>\n      <th>3730</th>\n      <td>Starbucks</td>\n      <td>47625-258991</td>\n      <td>金华义乌万达广场店</td>\n      <td>Joint Venture</td>\n      <td>浙江省金华市, 长江东路与东二环交口</td>\n      <td>金华市</td>\n      <td>33</td>\n      <td>CN</td>\n      <td>322000</td>\n      <td>NaN</td>\n      <td>GMT+08:00 Asia/Beijing</td>\n      <td>120.04</td>\n      <td>29.28</td>\n    </tr>\n  </tbody>\n</table>\n<p>315 rows × 13 columns</p>\n</div>"
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 33号地区的样本信息\n",
    "country_cn[country_cn['State/Province']=='33']  # DataFrame"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-05-04T05:58:17.854469200Z",
     "start_time": "2024-05-04T05:58:17.703873100Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 多级分组"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "outputs": [
    {
     "data": {
      "text/plain": "           Brand  Store Number        Store Name Ownership Type  \\\n0      Starbucks  47370-257954     Meritxell, 96       Licensed   \n1      Starbucks  22331-212325  Ajman Drive Thru       Licensed   \n2      Starbucks  47089-256771         Dana Mall       Licensed   \n3      Starbucks  22126-218024        Twofour 54       Licensed   \n4      Starbucks  17127-178586      Al Ain Tower       Licensed   \n...          ...           ...               ...            ...   \n25595  Starbucks  21401-212072               Rex       Licensed   \n25596  Starbucks  24010-226985          Panorama       Licensed   \n25597  Starbucks  47608-253804     Rosebank Mall       Licensed   \n25598  Starbucks  47640-253809      Menlyn Maine       Licensed   \n25599  Starbucks  47609-253286    Mall of Africa       Licensed   \n\n                                          Street Address  \\\n0                                      Av. Meritxell, 96   \n1                                   1 Street 69, Al Jarf   \n2                           Sheikh Khalifa Bin Zayed St.   \n3                                        Al Salam Street   \n4                        Khaldiya Area, Abu Dhabi Island   \n...                                                  ...   \n25595  141 Nguyễn Huệ, Quận 1, Góc đường Pasteur và L...   \n25596  SN-44, Tòa Nhà Panorama, 208 Trần Văn Trà, Quận 7   \n25597          Cnr Tyrwhitt and Cradock Avenue, Rosebank   \n25598  Shop 61B, Central Square, Cnr Aramist & Coroba...   \n25599             Shop 2077, Upper Level, Waterfall City   \n\n                        City State/Province Country Postcode  Phone Number  \\\n0           Andorra la Vella              7      AD    AD500     376818720   \n1                      Ajman             AJ      AE      NaN           NaN   \n2                      Ajman             AJ      AE      NaN           NaN   \n3                  Abu Dhabi             AZ      AE      NaN           NaN   \n4                  Abu Dhabi             AZ      AE      NaN           NaN   \n...                      ...            ...     ...      ...           ...   \n25595  Thành Phố Hồ Chí Minh             SG      VN    70000  08 3824 4668   \n25596  Thành Phố Hồ Chí Minh             SG      VN    70000  08 5413 8292   \n25597           Johannesburg             GT      ZA     2194   27873500159   \n25598                 Menlyn             GT      ZA      181           NaN   \n25599                Midrand             GT      ZA     1682   27873500215   \n\n                             Timezone  Longitude  Latitude  \n0             GMT+1:00 Europe/Andorra       1.53     42.51  \n1                GMT+04:00 Asia/Dubai      55.47     25.42  \n2                GMT+04:00 Asia/Dubai      55.47     25.39  \n3                GMT+04:00 Asia/Dubai      54.38     24.48  \n4                GMT+04:00 Asia/Dubai      54.54     24.51  \n...                               ...        ...       ...  \n25595          GMT+000000 Asia/Saigon     106.70     10.78  \n25596          GMT+000000 Asia/Saigon     106.71     10.72  \n25597  GMT+000000 Africa/Johannesburg      28.04    -26.15  \n25598  GMT+000000 Africa/Johannesburg      28.28    -25.79  \n25599  GMT+000000 Africa/Johannesburg      28.11    -26.02  \n\n[25600 rows x 13 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Brand</th>\n      <th>Store Number</th>\n      <th>Store Name</th>\n      <th>Ownership Type</th>\n      <th>Street Address</th>\n      <th>City</th>\n      <th>State/Province</th>\n      <th>Country</th>\n      <th>Postcode</th>\n      <th>Phone Number</th>\n      <th>Timezone</th>\n      <th>Longitude</th>\n      <th>Latitude</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Starbucks</td>\n      <td>47370-257954</td>\n      <td>Meritxell, 96</td>\n      <td>Licensed</td>\n      <td>Av. Meritxell, 96</td>\n      <td>Andorra la Vella</td>\n      <td>7</td>\n      <td>AD</td>\n      <td>AD500</td>\n      <td>376818720</td>\n      <td>GMT+1:00 Europe/Andorra</td>\n      <td>1.53</td>\n      <td>42.51</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Starbucks</td>\n      <td>22331-212325</td>\n      <td>Ajman Drive Thru</td>\n      <td>Licensed</td>\n      <td>1 Street 69, Al Jarf</td>\n      <td>Ajman</td>\n      <td>AJ</td>\n      <td>AE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>GMT+04:00 Asia/Dubai</td>\n      <td>55.47</td>\n      <td>25.42</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Starbucks</td>\n      <td>47089-256771</td>\n      <td>Dana Mall</td>\n      <td>Licensed</td>\n      <td>Sheikh Khalifa Bin Zayed St.</td>\n      <td>Ajman</td>\n      <td>AJ</td>\n      <td>AE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>GMT+04:00 Asia/Dubai</td>\n      <td>55.47</td>\n      <td>25.39</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Starbucks</td>\n      <td>22126-218024</td>\n      <td>Twofour 54</td>\n      <td>Licensed</td>\n      <td>Al Salam Street</td>\n      <td>Abu Dhabi</td>\n      <td>AZ</td>\n      <td>AE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>GMT+04:00 Asia/Dubai</td>\n      <td>54.38</td>\n      <td>24.48</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Starbucks</td>\n      <td>17127-178586</td>\n      <td>Al Ain Tower</td>\n      <td>Licensed</td>\n      <td>Khaldiya Area, Abu Dhabi Island</td>\n      <td>Abu Dhabi</td>\n      <td>AZ</td>\n      <td>AE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>GMT+04:00 Asia/Dubai</td>\n      <td>54.54</td>\n      <td>24.51</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>25595</th>\n      <td>Starbucks</td>\n      <td>21401-212072</td>\n      <td>Rex</td>\n      <td>Licensed</td>\n      <td>141 Nguyễn Huệ, Quận 1, Góc đường Pasteur và L...</td>\n      <td>Thành Phố Hồ Chí Minh</td>\n      <td>SG</td>\n      <td>VN</td>\n      <td>70000</td>\n      <td>08 3824 4668</td>\n      <td>GMT+000000 Asia/Saigon</td>\n      <td>106.70</td>\n      <td>10.78</td>\n    </tr>\n    <tr>\n      <th>25596</th>\n      <td>Starbucks</td>\n      <td>24010-226985</td>\n      <td>Panorama</td>\n      <td>Licensed</td>\n      <td>SN-44, Tòa Nhà Panorama, 208 Trần Văn Trà, Quận 7</td>\n      <td>Thành Phố Hồ Chí Minh</td>\n      <td>SG</td>\n      <td>VN</td>\n      <td>70000</td>\n      <td>08 5413 8292</td>\n      <td>GMT+000000 Asia/Saigon</td>\n      <td>106.71</td>\n      <td>10.72</td>\n    </tr>\n    <tr>\n      <th>25597</th>\n      <td>Starbucks</td>\n      <td>47608-253804</td>\n      <td>Rosebank Mall</td>\n      <td>Licensed</td>\n      <td>Cnr Tyrwhitt and Cradock Avenue, Rosebank</td>\n      <td>Johannesburg</td>\n      <td>GT</td>\n      <td>ZA</td>\n      <td>2194</td>\n      <td>27873500159</td>\n      <td>GMT+000000 Africa/Johannesburg</td>\n      <td>28.04</td>\n      <td>-26.15</td>\n    </tr>\n    <tr>\n      <th>25598</th>\n      <td>Starbucks</td>\n      <td>47640-253809</td>\n      <td>Menlyn Maine</td>\n      <td>Licensed</td>\n      <td>Shop 61B, Central Square, Cnr Aramist &amp; Coroba...</td>\n      <td>Menlyn</td>\n      <td>GT</td>\n      <td>ZA</td>\n      <td>181</td>\n      <td>NaN</td>\n      <td>GMT+000000 Africa/Johannesburg</td>\n      <td>28.28</td>\n      <td>-25.79</td>\n    </tr>\n    <tr>\n      <th>25599</th>\n      <td>Starbucks</td>\n      <td>47609-253286</td>\n      <td>Mall of Africa</td>\n      <td>Licensed</td>\n      <td>Shop 2077, Upper Level, Waterfall City</td>\n      <td>Midrand</td>\n      <td>GT</td>\n      <td>ZA</td>\n      <td>1682</td>\n      <td>27873500215</td>\n      <td>GMT+000000 Africa/Johannesburg</td>\n      <td>28.11</td>\n      <td>-26.02</td>\n    </tr>\n  </tbody>\n</table>\n<p>25600 rows × 13 columns</p>\n</div>"
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T06:45:28.098089300Z",
     "start_time": "2024-05-04T06:45:28.062264200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0        Starbucks\n",
      "1        Starbucks\n",
      "2        Starbucks\n",
      "3        Starbucks\n",
      "4        Starbucks\n",
      "           ...    \n",
      "25595    Starbucks\n",
      "25596    Starbucks\n",
      "25597    Starbucks\n",
      "25598    Starbucks\n",
      "25599    Starbucks\n",
      "Name: Brand, Length: 25600, dtype: object\n",
      "<class 'pandas.core.series.Series'>\n",
      "--------------------------------------------------\n",
      "Country  State/Province\n",
      "AD       7                  1\n",
      "AE       AJ                 2\n",
      "         AZ                48\n",
      "         DU                82\n",
      "         FU                 2\n",
      "                           ..\n",
      "US       WV                25\n",
      "         WY                23\n",
      "VN       HN                 6\n",
      "         SG                19\n",
      "ZA       GT                 3\n",
      "Name: Brand, Length: 545, dtype: int64\n",
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "print(df.loc[:,\"Brand\"])\n",
    "print(type(df.loc[:,\"Brand\"]))  # Series\n",
    "#print(type(df[\"Brand\"]))    # Series\n",
    "print(\"-\"*50)\n",
    "#print(df[\"Brand\"].groupby(by=[df[\"Country\"], df[\"State/Province\"]]).count())    # 一样，Series\n",
    "print(df.loc[:,\"Brand\"].groupby(by=[df[\"Country\"], df[\"State/Province\"]]).count())\n",
    "print(type(df.loc[:,\"Brand\"].groupby(by=[df[\"Country\"], df[\"State/Province\"]]).count()))    # Series\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T06:45:50.758354Z",
     "start_time": "2024-05-04T06:45:50.718346800Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 所以当只提出一列时，要保证聚合结果仍是df，则索引用 df【【列】】"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "           Brand\n",
      "0      Starbucks\n",
      "1      Starbucks\n",
      "2      Starbucks\n",
      "3      Starbucks\n",
      "4      Starbucks\n",
      "...          ...\n",
      "25595  Starbucks\n",
      "25596  Starbucks\n",
      "25597  Starbucks\n",
      "25598  Starbucks\n",
      "25599  Starbucks\n",
      "\n",
      "[25600 rows x 1 columns]\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "--------------------------------------------------\n"
     ]
    },
    {
     "data": {
      "text/plain": "                        Brand\nCountry State/Province       \nAD      7                   1\nAE      AJ                  2\n        AZ                 48\n        DU                 82\n        FU                  2\n...                       ...\nUS      WV                 25\n        WY                 23\nVN      HN                  6\n        SG                 19\nZA      GT                  3\n\n[545 rows x 1 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th></th>\n      <th>Brand</th>\n    </tr>\n    <tr>\n      <th>Country</th>\n      <th>State/Province</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>AD</th>\n      <th>7</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th rowspan=\"4\" valign=\"top\">AE</th>\n      <th>AJ</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>AZ</th>\n      <td>48</td>\n    </tr>\n    <tr>\n      <th>DU</th>\n      <td>82</td>\n    </tr>\n    <tr>\n      <th>FU</th>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <th>...</th>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th rowspan=\"2\" valign=\"top\">US</th>\n      <th>WV</th>\n      <td>25</td>\n    </tr>\n    <tr>\n      <th>WY</th>\n      <td>23</td>\n    </tr>\n    <tr>\n      <th rowspan=\"2\" valign=\"top\">VN</th>\n      <th>HN</th>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>SG</th>\n      <td>19</td>\n    </tr>\n    <tr>\n      <th>ZA</th>\n      <th>GT</th>\n      <td>3</td>\n    </tr>\n  </tbody>\n</table>\n<p>545 rows × 1 columns</p>\n</div>"
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(df[[\"Brand\"]])\n",
    "print(type(df[[\"Brand\"]]))  # Dataframe\n",
    "print(\"-\"*50)\n",
    "\n",
    "grouped1 = df[[\"Brand\"]].groupby(by=[df[\"Country\"], df[\"State/Province\"]]).count()\n",
    "grouped1    # Dataframe"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T06:43:25.445327Z",
     "start_time": "2024-05-04T06:43:25.374802100Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 当提取两列或更多时，分组完聚合都是df"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "outputs": [
    {
     "data": {
      "text/plain": "                        Brand  Store Number  Store Name  Ownership Type  \\\nCountry State/Province                                                    \nAD      7                   1             1           1               1   \nAE      AJ                  2             2           2               2   \n        AZ                 48            48          48              48   \n        DU                 82            82          82              82   \n        FU                  2             2           2               2   \n...                       ...           ...         ...             ...   \nUS      WV                 25            25          25              25   \n        WY                 23            23          23              23   \nVN      HN                  6             6           6               6   \n        SG                 19            19          19              19   \nZA      GT                  3             3           3               3   \n\n                        Street Address  City  Postcode  Phone Number  \\\nCountry State/Province                                                 \nAD      7                            1     1         1             1   \nAE      AJ                           2     2         0             0   \n        AZ                          48    48         7            20   \n        DU                          82    82        16            50   \n        FU                           2     2         1             0   \n...                                ...   ...       ...           ...   \nUS      WV                          25    25        25            23   \n        WY                          23    23        23            22   \nVN      HN                           6     6         6             6   \n        SG                          19    19        19            17   \nZA      GT                           3     3         3             2   \n\n                        Timezone  Longitude  Latitude  \nCountry State/Province                                 \nAD      7                      1          1         1  \nAE      AJ                     2          2         2  \n        AZ                    48         48        48  \n        DU                    82         82        82  \n        FU                     2          2         2  \n...                          ...        ...       ...  \nUS      WV                    25         25        25  \n        WY                    23         23        23  \nVN      HN                     6          6         6  \n        SG                    19         19        19  \nZA      GT                     3          3         3  \n\n[545 rows x 11 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th></th>\n      <th>Brand</th>\n      <th>Store Number</th>\n      <th>Store Name</th>\n      <th>Ownership Type</th>\n      <th>Street Address</th>\n      <th>City</th>\n      <th>Postcode</th>\n      <th>Phone Number</th>\n      <th>Timezone</th>\n      <th>Longitude</th>\n      <th>Latitude</th>\n    </tr>\n    <tr>\n      <th>Country</th>\n      <th>State/Province</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>AD</th>\n      <th>7</th>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th rowspan=\"4\" valign=\"top\">AE</th>\n      <th>AJ</th>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>AZ</th>\n      <td>48</td>\n      <td>48</td>\n      <td>48</td>\n      <td>48</td>\n      <td>48</td>\n      <td>48</td>\n      <td>7</td>\n      <td>20</td>\n      <td>48</td>\n      <td>48</td>\n      <td>48</td>\n    </tr>\n    <tr>\n      <th>DU</th>\n      <td>82</td>\n      <td>82</td>\n      <td>82</td>\n      <td>82</td>\n      <td>82</td>\n      <td>82</td>\n      <td>16</td>\n      <td>50</td>\n      <td>82</td>\n      <td>82</td>\n      <td>82</td>\n    </tr>\n    <tr>\n      <th>FU</th>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>2</td>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th rowspan=\"2\" valign=\"top\">US</th>\n      <th>WV</th>\n      <td>25</td>\n      <td>25</td>\n      <td>25</td>\n      <td>25</td>\n      <td>25</td>\n      <td>25</td>\n      <td>25</td>\n      <td>23</td>\n      <td>25</td>\n      <td>25</td>\n      <td>25</td>\n    </tr>\n    <tr>\n      <th>WY</th>\n      <td>23</td>\n      <td>23</td>\n      <td>23</td>\n      <td>23</td>\n      <td>23</td>\n      <td>23</td>\n      <td>23</td>\n      <td>22</td>\n      <td>23</td>\n      <td>23</td>\n      <td>23</td>\n    </tr>\n    <tr>\n      <th rowspan=\"2\" valign=\"top\">VN</th>\n      <th>HN</th>\n      <td>6</td>\n      <td>6</td>\n      <td>6</td>\n      <td>6</td>\n      <td>6</td>\n      <td>6</td>\n      <td>6</td>\n      <td>6</td>\n      <td>6</td>\n      <td>6</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>SG</th>\n      <td>19</td>\n      <td>19</td>\n      <td>19</td>\n      <td>19</td>\n      <td>19</td>\n      <td>19</td>\n      <td>19</td>\n      <td>17</td>\n      <td>19</td>\n      <td>19</td>\n      <td>19</td>\n    </tr>\n    <tr>\n      <th>ZA</th>\n      <th>GT</th>\n      <td>3</td>\n      <td>3</td>\n      <td>3</td>\n      <td>3</td>\n      <td>3</td>\n      <td>3</td>\n      <td>3</td>\n      <td>2</td>\n      <td>3</td>\n      <td>3</td>\n      <td>3</td>\n    </tr>\n  </tbody>\n</table>\n<p>545 rows × 11 columns</p>\n</div>"
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped2 = df.groupby(by=[df[\"Country\"], df[\"State/Province\"]]).count()\n",
    "grouped2    # Dataframe"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T06:41:06.333891800Z",
     "start_time": "2024-05-04T06:41:06.280313800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "outputs": [
    {
     "data": {
      "text/plain": "                        Brand  Store Name\nCountry State/Province                   \nAD      7                   1           1\nAE      AJ                  2           2\n        AZ                 48          48\n        DU                 82          82\n        FU                  2           2\n...                       ...         ...\nUS      WV                 25          25\n        WY                 23          23\nVN      HN                  6           6\n        SG                 19          19\nZA      GT                  3           3\n\n[545 rows x 2 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th></th>\n      <th>Brand</th>\n      <th>Store Name</th>\n    </tr>\n    <tr>\n      <th>Country</th>\n      <th>State/Province</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>AD</th>\n      <th>7</th>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th rowspan=\"4\" valign=\"top\">AE</th>\n      <th>AJ</th>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>AZ</th>\n      <td>48</td>\n      <td>48</td>\n    </tr>\n    <tr>\n      <th>DU</th>\n      <td>82</td>\n      <td>82</td>\n    </tr>\n    <tr>\n      <th>FU</th>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th rowspan=\"2\" valign=\"top\">US</th>\n      <th>WV</th>\n      <td>25</td>\n      <td>25</td>\n    </tr>\n    <tr>\n      <th>WY</th>\n      <td>23</td>\n      <td>23</td>\n    </tr>\n    <tr>\n      <th rowspan=\"2\" valign=\"top\">VN</th>\n      <th>HN</th>\n      <td>6</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th>SG</th>\n      <td>19</td>\n      <td>19</td>\n    </tr>\n    <tr>\n      <th>ZA</th>\n      <th>GT</th>\n      <td>3</td>\n      <td>3</td>\n    </tr>\n  </tbody>\n</table>\n<p>545 rows × 2 columns</p>\n</div>"
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped3 = df.loc[:,[\"Brand\", \"Store Name\"]].groupby(by=[df[\"Country\"], df[\"State/Province\"]]).count()\n",
    "grouped3    # Dataframe"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T06:49:18.027981100Z",
     "start_time": "2024-05-04T06:49:17.937545400Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### groupby得到分组对象，正常来说应该直接聚合\n",
    "### 但是这里对分组对象，先提出特征再聚合，和先聚合再提取特征、先提取特征再分组再聚合，三者结果一样"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<pandas.core.groupby.generic.SeriesGroupBy object at 0x0000025ACDC0C400>\n"
     ]
    },
    {
     "data": {
      "text/plain": "Brand\nCoffee House Holdings        1\nEvolution Fresh              2\nStarbucks                25249\nTeavana                    348\nName: Store Name, dtype: int64"
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped4 = df.groupby(\"Brand\")['Store Name'].count()\n",
    "print(df.groupby(\"Brand\")['Store Name'])\n",
    "grouped4"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T07:05:26.157256800Z",
     "start_time": "2024-05-04T07:05:26.131757700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "outputs": [
    {
     "data": {
      "text/plain": "Brand\nCoffee House Holdings        1\nEvolution Fresh              2\nStarbucks                25249\nTeavana                    348\nName: Store Name, dtype: int64"
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped4 = df.groupby(\"Brand\").count()[\"Store Name\"]\n",
    "grouped4"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T07:05:27.622354400Z",
     "start_time": "2024-05-04T07:05:27.516791700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "outputs": [
    {
     "data": {
      "text/plain": "Brand\nCoffee House Holdings        1\nEvolution Fresh              2\nStarbucks                25249\nTeavana                    348\nName: Store Name, dtype: int64"
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped4 = df[\"Store Name\"].groupby(df[\"Brand\"]).count()\n",
    "grouped4"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-05-04T07:05:29.199412600Z",
     "start_time": "2024-05-04T07:05:29.141925300Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
